Contents

library(Matrix)
library(scran)
library(Rtsne)
library(irlba)
library(cowplot)
library(biomaRt)

source("/nfs/research1/marioni/jonny/chimera-t/scripts/core_functions.R")
load_data(remove_doublets = TRUE, remove_stripped = TRUE, remove_pool2 = TRUE)

nPC = 50

In this script, we perform batch correction on our data.

1 Batch correction

For batch correction, we employ the scran function fastMNN, which performs batch correction in the manner of mnnCorrect, but in the PC-space, and much faster. Critically, this is a composition-aware batch-correction strategy that should not be affected by the lack of e.g. extraembryonic tissue in the Tomato+ samples (as the injected stem cells do not contribute to these lineages). We correct within each timepoint only.

1.1 Total correction

hvgs_E7.5 = getHVGs(scater::normalize(sce[,meta$stage == "E7.5"]))
hvgs_E8.5 = getHVGs(scater::normalize(sce[,meta$stage == "E8.5"]))

tab = table(meta$sample[meta$stage == "E7.5"])

#perform batch correction within each genotype, then across the genotypes
correct1 = doBatchCorrect(counts = logcounts(scater::normalize(sce[hvgs_E7.5, meta$stage == "E7.5"])), 
                          timepoints = meta$tomato[meta$stage == "E7.5"], 
                          samples = meta$sample[meta$stage == "E7.5"], 
                          timepoint_order = as.logical(c("TRUE", "FALSE")), 
                          sample_order = as.numeric(names(tab[order(tab, decreasing = TRUE)])))
tab = table(meta$sample[meta$stage == "E8.5"])

correct2 = doBatchCorrect(counts = logcounts(scater::normalize(sce[hvgs_E8.5, meta$stage == "E8.5"])), 
                          timepoints = meta$tomato[meta$stage == "E8.5"], 
                          samples = meta$sample[meta$stage == "E8.5"], 
                          timepoint_order = as.logical(c("TRUE", "FALSE")), 
                          sample_order = as.numeric(names(tab[order(tab, decreasing = TRUE)])))
corrected = list("E7.5" = correct1,
                 "E8.5" = correct2)

saveRDS(corrected, file = "/nfs/research1/marioni/jonny/chimera-t/data/corrected_pcas.rds")

A t-SNE visualisation of our cells, pre- and post-correction, is shown in Figure 1.

base_7.5 = prcomp_irlba(t(logcounts(scater::normalize(sce[hvgs_E7.5, meta$stage == "E7.5"]))), n = nPC)$x
base_8.5 = prcomp_irlba(t(logcounts(scater::normalize(sce[hvgs_E8.5, meta$stage == "E8.5"]))), n = nPC)$x

tsne_pre_7.5 = Rtsne(base_7.5, pca = FALSE)$Y
tsne_post_7.5 = Rtsne(corrected$E7.5, pca = FALSE)$Y

tsne_pre_8.5 = Rtsne(base_8.5, pca = FALSE)$Y
tsne_post_8.5 = Rtsne(corrected$E8.5, pca = FALSE)$Y

ro1 = sample(nrow(base_7.5), nrow(base_7.5))
ro2 = sample(nrow(base_8.5), nrow(base_8.5))

p1 = ggplot(as.data.frame(tsne_pre_7.5)[ro1,], aes(x = V1, y = V2, col = factor(meta$sample[meta$stage == "E7.5"][ro1]))) +
  geom_point(size = 0.4) +
  scale_colour_manual(values = sample_colours, name = "Sample") +
  theme(legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  ggtitle("E7.5, pre-correction") +
  labs(x= "tSNE1", y= "tSNE2")

p2 = ggplot(as.data.frame(tsne_post_7.5)[ro1,], aes(x = V1, y = V2, col = factor(meta$sample[meta$stage == "E7.5"][ro1]))) +
  geom_point(size = 0.4) +
  scale_colour_manual(values = sample_colours, name = "Sample") +
  theme(legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  ggtitle("E7.5, post-correction") +
  labs(x= "tSNE1", y= "tSNE2")

p3 = ggplot(as.data.frame(tsne_pre_8.5)[ro2,], aes(x = V1, y = V2, col = factor(meta$sample[meta$stage == "E8.5"][ro2]))) +
  geom_point(size = 0.4) +
  scale_colour_manual(values = sample_colours, name = "Sample") +
  theme(legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  ggtitle("E8.5, pre-correction") +
  labs(x= "tSNE1", y= "tSNE2")

p4 = ggplot(as.data.frame(tsne_post_8.5)[ro2,], aes(x = V1, y = V2, col = factor(meta$sample[meta$stage == "E8.5"][ro2]))) +
  geom_point(size = 0.4) +
  scale_colour_manual(values = sample_colours, name = "Sample") +
  theme(legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  ggtitle("E8.5, post-correction") +
  labs(x= "tSNE1", y= "tSNE2")

plot_grid(p1, p2, p3, p4)
t-SNE of cells before and after correction. Cells in red colours are Tomato+ (injected), cells in grey colours are Tomato- (embryo). Different shades of colour indicate difference samples.

Figure 1: t-SNE of cells before and after correction
Cells in red colours are Tomato+ (injected), cells in grey colours are Tomato- (embryo). Different shades of colour indicate difference samples.

2 Celltype-coloured tSNE

Figure 2 shows the same plots, but coloured by the mapped celltype (see the mapping script for details).

tsne_7.5 = Rtsne(corrected[["E7.5"]], pca = FALSE)$Y
tsne_8.5 = Rtsne(corrected[["E8.5"]], pca = FALSE)$Y

ro1 = sample(sum(meta$stage == "E7.5"), sum(meta$stage == "E7.5"))
ro2 = sample(sum(meta$stage == "E8.5"), sum(meta$stage == "E8.5"))

plegend = ggplot(as.data.frame(tsne_7.5)[ro1,], aes(x = V1, y = V2, col = factor(meta$celltype.mapped[meta$stage == "E7.5"][ro1], levels = names(celltype_colours), ordered = TRUE))) +
  geom_point() +
  scale_colour_manual(values = celltype_colours, drop = FALSE, name = "") +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank())  +
  guides(col = guide_legend(override.aes = list(size = 5), ncol = 3))

p1 = ggplot(as.data.frame(tsne_7.5)[ro1,], aes(x = V1, y = V2, col = factor(meta$celltype.mapped[meta$stage == "E7.5"][ro1], levels = names(celltype_colours), ordered = TRUE))) +
  geom_point() +
  scale_colour_manual(values = celltype_colours, name = "") +
  theme(legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  ggtitle("E7.5") +
  labs(x = "tSNE1", y = "tSNE2")


p2 = ggplot(as.data.frame(tsne_8.5)[ro2,], aes(x = V1, y = V2, col = factor(meta$celltype.mapped[meta$stage == "E8.5"][ro2], levels = names(celltype_colours), ordered = TRUE))) +
  geom_point() +
  scale_colour_manual(values = celltype_colours) +
  theme(legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  ggtitle("E8.5") +
  labs(x = "tSNE1", y = "tSNE2")


plot_grid(p1, get_legend(plegend), p2, ncol = 1, rel_heights = c(1,0.6,1))
t-SNEs, coloured by celltype.

Figure 2: t-SNEs, coloured by celltype

3 Celltype-coloured UMAP

Finally, we generate UMAP coordinates of the batch-corrected data.

umap_7.5 = getUMAP(corrected[["E7.5"]])


umap_8.5 = getUMAP(corrected[["E8.5"]])
p1 = ggplot(as.data.frame(umap_7.5)[ro1,], aes(x = V1, y = V2, col = factor(meta$celltype.mapped[meta$stage == "E7.5"][ro1], levels = names(celltype_colours), ordered = TRUE))) +
  geom_point(size = 0.3) +
  scale_colour_manual(values = celltype_colours) +
  theme(legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  ggtitle("E7.5") +
  labs(x = "UMAP1", y = "UMAP2")


p2 = ggplot(as.data.frame(umap_8.5)[ro2,], aes(x = V1, y = V2, col = factor(meta$celltype.mapped[meta$stage == "E8.5"][ro2], levels = names(celltype_colours), ordered = TRUE))) +
  geom_point(size = 0.3) +
  scale_colour_manual(values = celltype_colours) +
  theme(legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  ggtitle("E8.5") +
  labs(x = "UMAP1", y = "UMAP2")

plot_grid(p1, get_legend(plegend), p2, ncol = 1, rel_heights = c(1,0.6,1))

ggsave(p1, file = "/nfs/research1/marioni/jonny/chimera-t/scripts/batch_correct/umap_7.5.pdf",
       width = 5, height = 5)
ggsave(p2, file = "/nfs/research1/marioni/jonny/chimera-t/scripts/batch_correct/umap_8.5.pdf",
       width = 5, height = 5)

saveRDS(list("E7.5" = umap_7.5,
             "E8.5" = umap_8.5),
        file = "/nfs/research1/marioni/jonny/chimera-t/data/umaps.rds")

And below, the cells are split by their Tomato status.

meta$tomatoText = ifelse(meta$tomato, "Tom+", "Tom-")

p1 = ggplot(data = meta[meta$stage == "E7.5",], 
            mapping = aes(x = umap_7.5[ro1,1], 
                          y = umap_7.5[ro1,2], 
                          col = factor(celltype.mapped[ro1], 
                                       levels = names(celltype_colours), 
                                       ordered = TRUE))) +
  geom_point(size = 0.3) +
  scale_colour_manual(values = celltype_colours) +
  theme(legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank()) +
  ggtitle("E7.5") +
  facet_wrap(~tomatoText[ro1])

p2 = ggplot(data = meta[meta$stage == "E8.5",], 
            mapping = aes(x = umap_8.5[ro2,1], 
                          y = umap_8.5[ro2,2], 
                          col = factor(celltype.mapped[ro2], 
                                       levels = names(celltype_colours), 
                                       ordered = TRUE))) +
  geom_point(size = 0.3) +
  scale_colour_manual(values = celltype_colours) +
  theme(legend.position = "none",
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank()) +
  ggtitle("E8.5") +
  facet_wrap(~tomatoText[ro2])

plot_grid(p1, p2, ncol = 1)

ggsave(p1, file = "/nfs/research1/marioni/jonny/chimera-t/scripts/batch_correct/umap_7.5_split.pdf",
       width = 10, height = 5)
ggsave(p2, file = "/nfs/research1/marioni/jonny/chimera-t/scripts/batch_correct/umap_8.5_split.pdf",
       width = 10, height = 5)

3.1 Clone differences

These chimeras contain two differenct clones of the T knockout (i.e., different disruptions of the T genes). These are shown in Figure 3 for all samples.

meta$clone = sapply(meta$sample,
                    switch,
                    "B6KO",
                    "B6HOST",
                    "B6KO",
                    "B6HOST",
                    "B6KO",
                    "B6HOST",
                    "B6KO",
                    "B6HOST",
                    "C6KO",
                    "C6HOST",
                    "B6KO",
                    "B6HOST",
                    "C6KO",
                    "C6HOST",
                    "C6KO",
                    "C6HOST")

meta_8.5= meta[meta$stage == "E8.5",]
plots_8.5 = lapply(unique(meta_8.5$clone), function(x){
  ggplot() +
    geom_point(mapping = aes(x = umap_8.5[,1], 
                             y = umap_8.5[,2]), 
               col = "lightgrey") +
    geom_point(mapping = aes(x = umap_8.5[meta_8.5$clone==x,1], 
                             y = umap_8.5[meta_8.5$clone==x,2], 
                             col = meta_8.5$celltype.mapped[meta_8.5$clone==x])) +
    scale_color_manual(values = celltype_colours) +
    theme(legend.position = "none",
          axis.text = element_blank(),
          axis.ticks = element_blank()) +
    labs(x = "UMAP1", y = "UMAP2") +
    ggtitle(paste0(x, " (E8.5)"))
})

meta_7.5= meta[meta$stage == "E7.5",]
plots_7.5 = lapply(unique(meta_7.5$clone), function(x){
  ggplot() +
    geom_point(mapping = aes(x = umap_7.5[,1], 
                             y = umap_7.5[,2]), 
               col = "lightgrey") +
    geom_point(mapping = aes(x = umap_7.5[meta_7.5$clone==x,1], 
                             y = umap_7.5[meta_7.5$clone==x,2], 
                             fill = meta_7.5$celltype.mapped[meta_7.5$clone==x]),
    pch = 21, col = "black") +
    scale_fill_manual(values = celltype_colours) +
    theme(legend.position = "none",
          axis.text = element_blank(),
          axis.ticks = element_blank()) +
    labs(x = "UMAP1", y = "UMAP2") +
    ggtitle(paste0(x, " (E7.5)"))
})

grid = plot_grid(plotlist= c(plots_7.5, plots_8.5), ncol = 2)

grid
Location of different clones in the UMAPs

Figure 3: Location of different clones in the UMAPs

save_plot(grid, file = "/nfs/research1/marioni/jonny/chimera-t/scripts/batch_correct/clone.pdf", base_width = 8, base_height = 16)

4 Session Info

sessionInfo()
## R version 3.5.3 (2019-03-11)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.2 LTS
## 
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
## 
## locale:
## [1] C
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] scales_1.0.0                scater_1.10.1              
##  [3] knitr_1.23                  biomaRt_2.38.0             
##  [5] cowplot_0.9.4               ggplot2_3.1.1              
##  [7] irlba_2.3.3                 Rtsne_0.15                 
##  [9] scran_1.10.2                SingleCellExperiment_1.4.1 
## [11] SummarizedExperiment_1.12.0 DelayedArray_0.8.0         
## [13] matrixStats_0.54.0          Biobase_2.42.0             
## [15] GenomicRanges_1.34.0        GenomeInfoDb_1.18.2        
## [17] IRanges_2.16.0              S4Vectors_0.20.1           
## [19] BiocGenerics_0.28.0         BiocParallel_1.16.6        
## [21] Matrix_1.2-17               BiocStyle_2.10.0           
## [23] rmarkdown_1.13             
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-6             bit64_0.9-7             
##  [3] RcppAnnoy_0.0.12         RColorBrewer_1.1-2      
##  [5] progress_1.2.2           httr_1.4.0              
##  [7] dynamicTreeCut_1.63-1    tools_3.5.3             
##  [9] R6_2.4.0                 HDF5Array_1.10.1        
## [11] vipor_0.4.5              uwot_0.1.3              
## [13] DBI_1.0.0                lazyeval_0.2.2          
## [15] colorspace_1.4-1         withr_2.1.2             
## [17] tidyselect_0.2.5         gridExtra_2.3           
## [19] prettyunits_1.0.2        bit_1.1-14              
## [21] compiler_3.5.3           BiocNeighbors_1.0.0     
## [23] labeling_0.3             bookdown_0.11           
## [25] stringr_1.4.0            digest_0.6.19           
## [27] XVector_0.22.0           pkgconfig_2.0.2         
## [29] htmltools_0.3.6          highr_0.8               
## [31] limma_3.38.3             rlang_0.3.4             
## [33] RSQLite_2.1.1            DelayedMatrixStats_1.4.0
## [35] dplyr_0.8.1              RCurl_1.95-4.12         
## [37] magrittr_1.5             GenomeInfoDbData_1.2.0  
## [39] Rcpp_1.0.1               ggbeeswarm_0.6.0        
## [41] munsell_0.5.0            Rhdf5lib_1.4.3          
## [43] viridis_0.5.1            stringi_1.4.3           
## [45] yaml_2.2.0               edgeR_3.24.3            
## [47] zlibbioc_1.28.0          rhdf5_2.26.2            
## [49] plyr_1.8.4               grid_3.5.3              
## [51] blob_1.1.1               crayon_1.3.4            
## [53] lattice_0.20-38          hms_0.4.2               
## [55] locfit_1.5-9.1           pillar_1.4.1            
## [57] igraph_1.2.4.1           codetools_0.2-16        
## [59] reshape2_1.4.3           XML_3.98-1.20           
## [61] glue_1.3.1               evaluate_0.14           
## [63] RcppParallel_4.4.3       BiocManager_1.30.4      
## [65] gtable_0.3.0             purrr_0.3.2             
## [67] assertthat_0.2.1         xfun_0.7                
## [69] RSpectra_0.15-0          viridisLite_0.3.0       
## [71] tibble_2.1.3             AnnotationDbi_1.44.0    
## [73] beeswarm_0.2.3           memoise_1.1.0           
## [75] statmod_1.4.32